VLink 2.0.0
A high-performance communication middleware
Loading...
Searching...
No Matches
url_parser.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2026 by Thun Lu. All rights reserved.
3 * Author: Thun Lu <thun.lu@zohomail.cn>
4 * Repo: https://github.com/thun-res/vlink
5 * _ __ __ _ __
6 * | | / / / / (_) ____ / /__
7 * | | / / / / / / / __ \ / //_/
8 * | |/ / / /___ / / / / / / / ,<
9 * |___/ /_____/ /_/ /_/ /_/ /_/|_|
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 * http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24/**
25 * @file url_parser.h
26 * @brief RFC-compliant URL/URI parser used internally by the VLink transport layer.
27 *
28 * @details
29 * @c UrlParser decomposes a URL string into its constituent components following a
30 * strict subset of RFC 3986. It is used by @c Url to extract the URI scheme
31 * (which VLink treats as the transport prefix), host, path, and query parameters
32 * from VLink topic addresses such as:
33 *
34 * @code
35 * dds://my_domain/vehicle/speed?domain_id=1&qos=best_effort
36 * intra://my_topic
37 * someip://127.0.0.1:30490/my_service?instance_id=1
38 * @endcode
39 *
40 * @par Supported URL Components
41 * | Component | Example | Description |
42 * | ---------- | --------------------------- | -------------------------------------- |
43 * | transport | @c dds | URI scheme / VLink transport prefix before @c :// |
44 * | content | @c //host/path | Full content portion after the scheme |
45 * | username | @c user | Optional credential before host |
46 * | password | @c pass | Optional credential after @c : |
47 * | host | @c 127.0.0.1 | Hostname or IP address |
48 * | port | @c 30490 | TCP/UDP port number |
49 * | path | @c /vehicle/speed | Topic path |
50 * | query | @c domain_id=1&qos=... | Raw query string after @c ? |
51 * | fragment | @c section1 | Fragment identifier after @c # |
52 *
53 * @par Query Dictionary
54 * The query string is automatically split into a @c std::map<string,string> using
55 * either @c & (default) or @c ; as the key-value pair separator. Values are
56 * split on the first @c = character.
57 *
58 * @par Category
59 * - @c kHierarchical -- standard @c scheme://authority/path?query#fragment syntax.
60 * - @c kNonHierarchical -- opaque @c scheme:content syntax (e.g. @c mailto:user(at)host).
61 *
62 * @note @c UrlParser is a value type; it parses the URL at construction time and
63 * provides read-only accessors for each extracted component.
64 */
65
66#pragma once
67
68#include <cstdint>
69#include <map>
70#include <string>
71
72#include "../base/macros.h"
73
74namespace vlink {
75
76/**
77 * @class UrlParser
78 * @brief Immutable RFC-3986 URL parser.
79 *
80 * @details
81 * Parses the input URL string once at construction time. All accessor methods
82 * are @c const and return references to internally stored strings; the lifetime
83 * of the returned references is tied to the lifetime of the @c UrlParser object.
84 */
86 public:
87 /**
88 * @enum Category
89 * @brief Distinguishes hierarchical and non-hierarchical URL forms.
90 */
91 enum class Category : uint8_t {
92 kHierarchical = 0, ///< Standard @c scheme://authority/path form (most VLink transports).
93 kNonHierarchical = 1, ///< Opaque @c scheme:content form (e.g. @c mailto:).
94 };
95
96 /**
97 * @enum Component
98 * @brief Identifies individual URL components for the components-map constructor.
99 */
100 enum class Component : uint8_t {
101 kTransport = 0, ///< URI scheme / VLink transport prefix (e.g. @c dds, @c intra).
102 kContent = 1, ///< Full content string after the scheme separator.
103 kUsername = 2, ///< Optional authentication username.
104 kPassword = 3, ///< Optional authentication password.
105 kHost = 4, ///< Hostname or IP address.
106 kPort = 5, ///< Port number (stored as string in the components map).
107 kPath = 6, ///< Resource path (e.g. @c /vehicle/speed).
108 kQuery = 7, ///< Raw query string (without the leading @c ?).
109 kFragment = 8, ///< Fragment identifier (without the leading @c #).
110 };
111
112 /**
113 * @enum Separator
114 * @brief Query-string key-value pair delimiter.
115 */
116 enum class Separator : uint8_t {
117 kAmpersand = 0, ///< @c & separator (default; @c key=val&key2=val2).
118 kSemicolon = 1, ///< @c ; separator (alternative; @c key=val;key2=val2).
119 };
120
121 /**
122 * @brief Constructs a parser by parsing the given C-string URL.
123 *
124 * @param str Null-terminated URL string to parse.
125 * @param category Hierarchical or non-hierarchical form; default hierarchical.
126 * @param separator Query key-value delimiter; default ampersand (@c &).
127 */
128 explicit UrlParser(const char* str, Category category = Category::kHierarchical,
129 Separator separator = Separator::kAmpersand);
130
131 /**
132 * @brief Constructs a parser by parsing the given @c std::string URL.
133 *
134 * @param str URL string to parse.
135 * @param category Hierarchical or non-hierarchical form; default hierarchical.
136 * @param separator Query key-value delimiter; default ampersand (@c &).
137 */
138 explicit UrlParser(const std::string& str, Category category = Category::kHierarchical,
139 Separator separator = Separator::kAmpersand);
140
141 /**
142 * @brief Constructs a URL from an explicit component map.
143 *
144 * @details
145 * Builds the internal state from a pre-decomposed set of components rather than
146 * parsing a raw URL string. Useful when constructing a modified URL from an
147 * existing @c UrlParser instance.
148 *
149 * @param components Map of @c Component to string value for each component present.
150 * @param category Hierarchical or non-hierarchical form.
151 * @param rooted @c true if the path begins with @c / (hierarchical URLs only).
152 * @param separator Query key-value delimiter; default ampersand.
153 */
154 explicit UrlParser(const std::map<Component, std::string>& components, Category category, bool rooted,
155 Separator separator = Separator::kAmpersand);
156
157 /**
158 * @brief Constructs a parser by copying @p other and overriding specific components.
159 *
160 * @details
161 * Copies all components from @p other, then replaces those present in
162 * @p replacements. Equivalent to creating a modified copy of an existing URL.
163 *
164 * @param other Source @c UrlParser to copy from.
165 * @param replacements Components to override in the copy.
166 */
167 explicit UrlParser(const UrlParser& other, const std::map<Component, std::string>& replacements);
168
169 /**
170 * @brief Returns the transport string parsed from the URL (e.g. @c "dds", @c "intra").
171 *
172 * @return Reference to the parsed transport component; empty if not present.
173 */
174 [[nodiscard]] const std::string& get_transport() const;
175
176 /**
177 * @brief Returns the URL category (hierarchical or non-hierarchical).
178 *
179 * @return The @c Category value supplied at construction.
180 */
181 [[nodiscard]] Category get_category() const;
182
183 /**
184 * @brief Returns the full content portion of the URL (after the scheme separator).
185 *
186 * @return Reference to the parsed content string; empty if not present.
187 */
188 [[nodiscard]] const std::string& get_content() const;
189
190 /**
191 * @brief Returns the authentication username component.
192 *
193 * @return Reference to the parsed username; empty if not present.
194 */
195 [[nodiscard]] const std::string& get_username() const;
196
197 /**
198 * @brief Returns the authentication password component.
199 *
200 * @return Reference to the parsed password; empty if not present.
201 */
202 [[nodiscard]] const std::string& get_password() const;
203
204 /**
205 * @brief Returns the host component (hostname or IP address).
206 *
207 * @return Reference to the parsed host string; empty if not present.
208 */
209 [[nodiscard]] const std::string& get_host() const;
210
211 /**
212 * @brief Returns the port number, or @c 0 if no port was specified.
213 *
214 * @return Parsed port as @c int64_t; @c 0 if absent.
215 */
216 [[nodiscard]] int64_t get_port() const;
217
218 /**
219 * @brief Returns the path component of the URL.
220 *
221 * @return Reference to the parsed path string; empty if not present.
222 */
223 [[nodiscard]] const std::string& get_path() const;
224
225 /**
226 * @brief Returns the raw query string (without the leading @c ? character).
227 *
228 * @return Reference to the raw query; empty if no query was present.
229 */
230 [[nodiscard]] const std::string& get_query() const;
231
232 /**
233 * @brief Returns the parsed query string as a key-value dictionary.
234 *
235 * @details
236 * Built by splitting the raw query on the configured @c Separator and then
237 * splitting each token on the first @c = character. Keys without a @c =
238 * are stored with an empty-string value.
239 *
240 * @return Reference to the @c std::map<string,string> query dictionary.
241 */
242 [[nodiscard]] const std::map<std::string, std::string>& get_query_dictionary() const;
243
244 /**
245 * @brief Returns the fragment identifier component (without the leading @c #).
246 *
247 * @return Reference to the parsed fragment; empty if not present.
248 */
249 [[nodiscard]] const std::string& get_fragment() const;
250
251 /**
252 * @brief Reconstructs the URL as a canonical string from parsed components.
253 *
254 * @details
255 * Re-assembles the parsed URI into a canonical string. Hierarchical URLs are
256 * emitted as @c scheme://authority/path?query#fragment, while non-hierarchical
257 * URLs are emitted as @c scheme:content?query#fragment. The output may differ
258 * slightly from the original input if the input had unusual whitespace or
259 * encoding.
260 *
261 * @return Reconstructed URL string.
262 */
263 [[nodiscard]] std::string to_string() const;
264
265 private:
266 void setup(const std::string& str, Category category);
267
268 std::string::const_iterator parse_transport(const std::string& str, std::string::const_iterator transport_start);
269
270 std::string::const_iterator parse_content(const std::string& str, std::string::const_iterator content_start);
271
272 std::string::const_iterator parse_username(const std::string& str, const std::string& content,
273 std::string::const_iterator username_start);
274
275 std::string::const_iterator parse_password(const std::string& str, const std::string& content,
276 std::string::const_iterator password_start);
277
278 std::string::const_iterator parse_host(const std::string& str, const std::string& content,
279 std::string::const_iterator host_start);
280
281 std::string::const_iterator parse_port(const std::string& str, const std::string& content,
282 std::string::const_iterator port_start);
283
284 std::string::const_iterator parse_query(const std::string& str, std::string::const_iterator query_start);
285
286 std::string::const_iterator parse_fragment(const std::string& str, std::string::const_iterator fragment_start);
287
288 void init_query_dictionary();
289
290 std::string transport_;
291 std::string content_;
292 std::string username_;
293 std::string password_;
294 std::string host_;
295 std::string path_;
296 std::string query_;
297 std::string fragment_;
298 std::map<std::string, std::string> query_dict_;
300 int64_t port_{0};
301 bool is_rooted_{false};
302 Separator separator_{Separator::kAmpersand};
303};
304
305} // namespace vlink
Platform-independent macro definitions for the VLink library.
#define VLINK_EXPORT
Definition macros.h:85